{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run main.py\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "!mkdir -p {NAVEC_DIR} {MODEL_DIR} {PACK_DIR}\n",
    "s3 = S3()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not exists(NAVEC):\n",
    "    !wget {NAVEC_URL} -O {NAVEC}\n",
    "    s3.download(S3_TAGS_VOCAB, TAGS_VOCAB)\n",
    "    s3.download(S3_MODEL_SHAPE, MODEL_SHAPE)\n",
    "    s3.download(S3_MODEL_ENCODER, MODEL_ENCODER)\n",
    "    s3.download(S3_MODEL_MORPH, MODEL_MORPH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "navec = Navec.load(NAVEC)\n",
    "\n",
    "words_vocab = Vocab(navec.vocab.words)\n",
    "shapes_vocab = Vocab([PAD] + SHAPES)\n",
    "tags_vocab = Vocab.load(TAGS_VOCAB)\n",
    "\n",
    "word = NavecEmbedding(navec)\n",
    "shape = Embedding(\n",
    "    vocab_size=len(shapes_vocab),\n",
    "    dim=SHAPE_DIM,\n",
    "    pad_id=shapes_vocab.pad_id\n",
    ")\n",
    "emb = TagEmbedding(word, shape)\n",
    "encoder = TagEncoder(\n",
    "    input_dim=emb.dim,\n",
    "    layer_dims=LAYER_DIMS,\n",
    "    kernel_size=KERNEL_SIZE,\n",
    ")\n",
    "morph = MorphHead(encoder.dim, len(tags_vocab))\n",
    "model = Morph(emb, encoder, morph)\n",
    "model.eval()\n",
    "\n",
    "model.emb.shape.load(MODEL_SHAPE)\n",
    "model.encoder.load(MODEL_ENCODER)\n",
    "model.head.load(MODEL_MORPH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = model.to_exec()\n",
    "model = model.strip_navec()\n",
    "arrays, model = model.separate_arrays()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with DumpPack(PACK) as pack:\n",
    "    meta = Meta(ID)\n",
    "    pack.dump_meta(meta)\n",
    "    \n",
    "    pack.dump_model(model)\n",
    "    pack.dump_arrays(arrays)\n",
    "\n",
    "    pack.dump_vocab(words_vocab, WORD)\n",
    "    pack.dump_vocab(shapes_vocab, SHAPE)\n",
    "    pack.dump_vocab(tags_vocab, TAG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "morph = api.Morph.load(PACK)\n",
    "morph.navec(navec)\n",
    "words = 'Ежедневно очаги коронавирусной инфекции'.split()\n",
    "markup = morph(words)\n",
    "show_morph_markup(markup)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s3.upload(PACK, S3_PACK)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
